
def snv2vcf_cmd(cargs):
    import numpy as NP
    import pandas as pd
    from scsnvpy import snvcmp
    import sys, argparse, os
    import itertools

    parser = argparse.ArgumentParser(description='Create a minimal VCF and mtx matrices from SNV data by removing potential edits in REDIportal or overapping Alu elements')
    parser.add_argument('-a', '--max-af', help='Maximum allele fraction (to remove homozygous alleles)', type=float, default=0.999)
    parser.add_argument('-o', '--output', help='Output folder', type=str)

    parser.add_argument('-r', '--ref', help='Ref lengths text generated by scsnv index (to generate vcf header)', type=str)
    parser.add_argument('-f', '--fasta', help='Fasta reference file (for vcf header)', type=str)
    egroup = parser.add_mutually_exclusive_group(required=True)
    egroup.add_argument('-u', '--all', help='Keep all non-homozygous', action='store_true')
    egroup.add_argument('-e', '--remove-edits', help='Filter strand corrected A-to-G changes that overlap REDIportal and/or Alu elements', action='store_true')
    egroup.add_argument('-n', '--only-edits', help='Filter strand corrected A-to-G changes that overlap REDIportal and/or Alu elements', action='store_true')
    egroup.add_argument('-k', '--keep-annotated', help='Only keep annotated edits (1000 genomes)', action='store_true')
    egroup.add_argument('-b', '--keep-unannotated', help='Only keep unannotated edits (1000 genomes)', action='store_true')

    group = parser.add_mutually_exclusive_group(required=True)
    group.add_argument('-c', '--csv', help='Write CSV files compatible with scSplit', action='store_true')
    group.add_argument('-m', '--mtx', help='Write vartrix like MTX files compatible with souporcell', action='store_true')
    group.add_argument('-v', '--vireo', help='Write vartrix like sparse MTX files compatible with vireo', action='store_true')

    parser.add_argument('annotated', help='Annotated mutation flammkuchen/deepdish file')
    args = parser.parse_args(cargs[1:])
    print(args)

    sdata = args.annotated
    gt_dir = args.output
    rdata = args.ref
    data = snvcmp.SampleData(sdata, 'scSNV')
    if not os.path.isdir(gt_dir):
        os.mkdir(gt_dir)

    snvs = data.snvs
    keep = (snvs.strand_AF <= args.max_af)
    r = (~keep).sum()
    p = 100.0 * r / len(keep)
    print(f'Removing {r} homozygous SNVs [{p:.2f}%]')

    if args.remove_edits or args.only_edits:
        snvs['repeat_type'] = ''
        snvs.loc[snvs.rep_family != '', 'repeat_type'] = 'Other'
        snvs.loc[snvs.rep_family.str.contains('Alu'), 'repeat_type'] = 'Alu'
        snvs['potential_edit'] = (snvs.strand_ref == 'A') & (snvs.strand_alt == 'G') & ((snvs.repeat_type == 'Alu') | snvs.REDIportal)
        if args.remove_edits:
            print("Removing edits")
            keep &= ~snvs['potential_edit']
        else:
            print("Only keeping edits")
            keep &= snvs['potential_edit']
    elif args.keep_annotated:
        print("Only keeping annotated SNVs")
        keep &= snvs.g1000
    elif args.keep_unannotated:
        print("Only keeping unannotated SNVs")
        keep &= ~snvs.g1000


    fsnvs = snvs[keep].copy()

    print(f'Using {len(fsnvs)} / {len(snvs)} SNVs for genotyping')

    ridx = fsnvs.snv_idx.values.copy()
    tref = data.strand_ref[ridx].toarray()
    talt = data.strand_alt[ridx].toarray()

    if args.mtx:
        tref = data.strand_ref[ridx].toarray()
        talt = data.strand_alt[ridx].toarray()
        def write_mtx(fout, mat):
            with open(fout, 'w') as fp:
                fp.write('%%MatrixMarket matrix coordinate integer general\n')
                fp.write('%\n')
                fp.write(f'{mat.shape[0]} {mat.shape[1]} {mat.shape[0] * mat.shape[1]}\n')
                for i in range(mat.shape[0]):
                    for j in range(mat.shape[1]):
                        fp.write(f'{i + 1} {j + 1} {mat[i, j]}\n')

        print("Writing ref matrix")
        write_mtx(os.path.join(gt_dir, f'refs.mtx'), tref)
        print("Writing alt matrix")
        write_mtx(os.path.join(gt_dir, f'alts.mtx'), talt)
        print("Writing barcodes")
        if not os.path.isfile(f'{gt_dir}/barcodes.txt'):
            fout = open(f'{gt_dir}/barcodes.txt', 'w')
            for b in data.barcodes:
                fout.write(b + "\n")
            fout.close()
    elif args.vireo:
        from scipy.io import mmwrite
        from scipy.sparse import csr_matrix
        print("Writing ref matrix")
        mmwrite(os.path.join(gt_dir, f'refs.mtx'), data.strand_ref[ridx])
        print("Writing alt matrix")
        mmwrite(os.path.join(gt_dir, f'alts.mtx'), data.strand_alt[ridx])
        print("Writing barcodes")
        if not os.path.isfile(f'{gt_dir}/barcodes.txt'):
            fout = open(f'{gt_dir}/barcodes.txt', 'w')
            for b in data.barcodes:
                fout.write(b + "\n")
            fout.close()
    elif args.csv:
        tref = data.strand_ref[ridx].toarray()
        talt = data.strand_alt[ridx].toarray()
        rows = [f'{i[0]}:{i[1] + 1}' for i in fsnvs.index]
        print("Writing reference allele csv")
        df = pd.DataFrame(data=tref, columns=data.barcodes, index=rows)
        df.to_csv(f'{gt_dir}/refs.csv')

        print("Writing alternative allele csv")
        df = pd.DataFrame(data=talt, columns=data.barcodes, index=rows)
        df.to_csv(f'{gt_dir}/alts.csv')


    rdata = pd.read_csv(rdata, sep='\t', names=['chrom', 'rlen', 'comment'])
    print("Writing VCF")
    with open(f'{gt_dir}/snvs.vcf', 'w') as fp:
        fp.write('##fileformat=VCFv4.2\n')
        fp.write(f'##reference={args.fasta}\n')
        for r in rdata.itertuples():
            fp.write(f'##contig=<ID={r.chrom},length={r.rlen}>\n')
        fp.write("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n")
        for i, r in fsnvs.iterrows():
            fp.write(f'{i[0]}\t{i[1] + 1}\tscsnv_idx_{r.snv_idx}\t{r.ref}\t{r.alt}\t.\tPASS\t.\n')

    print("Writing SNV csv file")
    fsnvs.to_csv(f'{gt_dir}/snvs.csv')
